library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
myurl <- "https://raw.githubusercontent.com/reisanar/datasets/master/WorldCupMatches.csv"
worldcup <- read.csv(myurl)
worldcup

Look at the world_bank_pop function

world_bank_pop

Look at the WorldPhone function

WorldPhones
##      N.Amer Europe Asia S.Amer Oceania Africa Mid.Amer
## 1951  45939  21574 2876   1815    1646     89      555
## 1956  60423  29990 4708   2568    2366   1411      733
## 1957  64721  32510 5230   2695    2526   1546      773
## 1958  68484  35218 6662   2845    2691   1663      836
## 1959  71799  37598 6856   3000    2868   1769      911
## 1960  76036  40341 8220   3145    3054   1905     1008
## 1961  79831  43173 9053   3338    3224   2005     1076

Take a glimpse of worldcup

glimpse(worldcup)
## Rows: 4,572
## Columns: 20
## $ Year                 <int> 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1930, 1~
## $ Datetime             <chr> "13 Jul 1930 - 15:00 ", "13 Jul 1930 - 15:00 ", "~
## $ Stage                <chr> "Group 1", "Group 4", "Group 2", "Group 3", "Grou~
## $ Stadium              <chr> "Pocitos", "Parque Central", "Parque Central", "P~
## $ City                 <chr> "Montevideo ", "Montevideo ", "Montevideo ", "Mon~
## $ Home.Team.Name       <chr> "France", "USA", "Yugoslavia", "Romania", "Argent~
## $ Home.Team.Goals      <int> 4, 3, 2, 3, 1, 3, 4, 3, 1, 1, 6, 4, 1, 4, 3, 6, 6~
## $ Away.Team.Goals      <int> 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 1, 1~
## $ Away.Team.Name       <chr> "Mexico", "Belgium", "Brazil", "Peru", "France", ~
## $ Win.conditions       <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",~
## $ Attendance           <int> 4444, 18346, 24059, 2549, 23409, 9249, 18306, 183~
## $ Half.time.Home.Goals <int> 3, 2, 2, 1, 0, 1, 0, 2, 0, 0, 3, 1, 1, 4, 2, 1, 3~
## $ Half.time.Away.Goals <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1~
## $ Referee              <chr> "LOMBARDI Domingo (URU)", "MACIAS Jose (ARG)", "T~
## $ Assistant.1          <chr> "CRISTOPHE Henry (BEL)", "MATEUCCI Francisco (URU~
## $ Assistant.2          <chr> "REGO Gilberto (BRA)", "WARNKEN Alberto (CHI)", "~
## $ RoundID              <int> 201, 201, 201, 201, 201, 201, 201, 201, 201, 201,~
## $ MatchID              <int> 1096, 1090, 1093, 1098, 1085, 1095, 1092, 1097, 1~
## $ Home.Team.Initials   <chr> "FRA", "USA", "YUG", "ROU", "ARG", "CHI", "YUG", ~
## $ Away.Team.Initials   <chr> "MEX", "BEL", "BRA", "PER", "FRA", "MEX", "BOL", ~

Arrange world cup in Year, Datetime, Stage, Stadium

arrange(worldcup, Year, Datetime, Stage, Stadium)

arrange Home.Team.Goals and year in descending order

arrange(worldcup, desc(Home.Team.Goals, Year))

Select everything that ends with Goals

select(worldcup,
      Home.Team.Goals:Away.Team.Goals,
      ends_with("Goals"),
      Half.time.Home.Goals,
      Half.time.Away.Goals)

create a function called worldcup_data

worldcup_data = select(worldcup,
                       Home.Team.Goals:Away.Team.Goals,
                       ends_with("Goals"),
                       Half.time.Home.Goals,
                       Half.time.Away.Goals)
worldcup_data

mutate a function called Total_fh_goals and Total_ht_goals which make the first value add Home.Team.Goals and Away.Team.Goals second value add Half.Time.Home.Goals and Half.time.Away.Goals

mutate(worldcup_data,
       Total_fh_goals = Home.Team.Goals + Away.Team.Goals,
       Total_ht_goals = Half.time.Home.Goals + Half.time.Away.Goals
       )

Find the mean of Home.Team.Goals

summarise(worldcup, hometeamgoals = mean(Home.Team.Goals, na.rm = TRUE))

group Year and find the avg

worldcup %>%
  group_by(Year) %>%
  summarize(
    count = n(),
    avg = mean(Year, na.rm = TRUE)
  )

List only the first 10 of the Dataset

head(worldcup, 10)

Filter all the Argentina games

filter(worldcup, Home.Team.Name == "Argentina")

Filter all the Away.Team.Name of Argentina

argen1 = worldcup %>%
  filter(Away.Team.Name == "Argentina")
argen1

Create a geom_point() to see the Goals scored each year for the Away.Team.Goals for Argentina

ggplot(data = argen1) +
  geom_point(aes(x = Year, y = Away.Team.Goals))

Create function named argen that Lists only Argentina

argen = worldcup %>%
  filter(Home.Team.Name == "Argentina")
argen

Filter all attendance greater than 67800

worldcup %>%
  filter(Attendance > 67800)

Arrange to show the most Home.Team.Goals

arrange(worldcup, desc(Home.Team.Goals))

make a geom_point() with x being Year and y being Home.Team.Goals

ggplot(data = argen) +
  geom_point(aes(x = Year, y = Home.Team.Goals))

filter all Home.Team.Name for Portugal

portg = worldcup %>%
  filter(Home.Team.Name == "Portugal")
portg

Filter all Away.Team.Name for Portugal

portg1 = worldcup %>%
  filter(Away.Team.Name == "Portugal")
portg1

make a geom_point() with the portg1 function with x being Year and y being Away.Team.Goals

ggplot(data = portg1) +
  geom_point(aes(x = Year, y = Away.Team.Goals))

Filter just the Old Trafford Stadium

stad = worldcup %>%
  filter(Stadium == "Old Trafford Stadium")
stad

Create a boxplot with x being Stadium and y being Attendance

ggplot(data = stad) +
  geom_boxplot(aes(x = Stadium, y = Attendance))

all_arg_players = worldcup %>%
  filter(Home.Team.Name == "Argentina")
all_arg_players
all_por_players = worldcup %>%
  filter(Home.Team.Name == "Portugal")
all_por_players
worldcup %>%
  filter(Year == 1990, Home.Team.Name == "Argentina")

create a geom_point() x being Year and y being Home.Team.Goals

ggplot(data = portg) +
  geom_point(aes(x = Year, y = Home.Team.Goals))

Filter RoundID > 300 and Away.Team.Initials == "ARG"

worldcup %>%
  filter(RoundID > "300", Away.Team.Initials == "ARG")
worldcup %>%
  filter(RoundID > "300", Away.Team.Initials == "POR")

create a geom_point() and make x Year and Attendance

ggplot(data = worldcup) +
  geom_point(aes(x = Year, y = Attendance))
## Warning: Removed 3722 rows containing missing values (geom_point).

Filter Year > 1996 and RoundID > 6000

worldcup %>%
  filter(Year > 1996, RoundID > 6000)

Create geom_point() City as color size as Home.Team.Goals and alpha = 0.9

ggplot(data = worldcup) +
  geom_point(aes(x = Year, y = Attendance,
                 color = City, size = Home.Team.Goals),
             alpha = 0.9)
## Warning: Removed 3722 rows containing missing values (geom_point).

Which team had the most Home Team Goals?

worldcup %>%
  arrange(desc(Home.Team.Goals))

Which team had the most Away Team Goals?

worldcup %>%
  arrange(desc(Away.Team.Goals))

Make a bar graph for Year

ggplot(data = worldcup) +
  geom_bar(aes(x = Year))
## Warning: Removed 3720 rows containing non-finite values (stat_count).

Count the total amount of Referees

worldcup %>%
  count(Referee)

Create a histogram for the Attendance

ggplot(data = worldcup) +
  geom_histogram(aes(x = Attendance))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3722 rows containing non-finite values (stat_bin).

Use a dark green color to draw a geom_point

ggplot(worldcup, aes(x = Year, y = Attendance)) + 
  geom_point(color = "darkgreen")
## Warning: Removed 3722 rows containing missing values (geom_point).

Change the title to home team goals and use a bargraph

ggplot(worldcup, aes(x = Home.Team.Goals)) + 
  geom_histogram(fill = "green", color = "blue") + 
  ggtitle("Home Team goals") + 
  xlab("Goals") + xlim(c(0, 10))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3720 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Create a scatter plot for Away Team Goals

ggplot(worldcup, aes(x = Home.Team.Goals, y = Half.time.Home.Goals)) + 
  geom_point()
## Warning: Removed 3720 rows containing missing values (geom_point).

select all rows except Referee

worldcup %>%
  select(-Referee)